File set-up

Set working directory to current directory

if (rstudioapi::isAvailable()) {
  setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
}

Loads standard libraries and resolve conflicts

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.0      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(conflicted)
conflict_prefer("filter", "dplyr")
## [conflicted] Will prefer dplyr::filter over any other package
conflict_prefer("select", "dplyr")
## [conflicted] Will prefer dplyr::select over any other package
conflict_prefer("slice", "dplyr")
## [conflicted] Will prefer dplyr::slice over any other package
conflict_prefer("rename", "dplyr")
## [conflicted] Will prefer dplyr::rename over any other package
conflict_prefer('intersect', 'dplyr')
## [conflicted] Will prefer dplyr::intersect over any other package

Load specific libraries

library(UpSetR)

Set figure theme

mytheme = theme_bw(base_size = 10) + 
  theme(text = element_text(size=10, colour='black'),
        title = element_text(size=10, colour='black'),
        line = element_line(size=0.5),
        axis.title = element_text(size=10, colour='black'),
        axis.text = element_text(size=10, colour='black'),
        axis.line = element_line(colour = "black"),
        axis.ticks = element_line(size=0.5),
        strip.background = element_blank(),
        strip.text.y = element_blank(),
        panel.grid = element_blank(),
        panel.border = element_blank(),
        legend.position = c(0.8, 0.8), 
        legend.text=element_text(size=10)) 

mytheme_discrete_x = mytheme + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))

# a colorblind-friendly palette 
# colorblind.palette.grey = c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

Read data and order

all_circ = read_tsv('../data/Supplementary_Table_2_all_circRNAs.txt')
## Rows: 1137099 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (15): chr, strand, cell_line, tool, circ_id, circ_id_strand, count_group...
## dbl  (6): start, end, BSJ_count, n_detected, n_db, estim_len_in
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
cq = read_tsv('../data/Supplementary_Table_3_selected_circRNAs.txt')
## Rows: 1560 Columns: 44
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (24): chr, strand, circ_id, circ_id_strand, tool, cell_line, FWD_primer,...
## dbl (20): start, end, FWD_pos, FWD_length, REV_pos, REV_length, FWD_TM, REV_...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
val = read_tsv('../data/Supplementary_Table_4_precision_values.txt')
## Rows: 29 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (2): tool, count_group
## dbl (14): nr_qPCR_total, nr_qPCR_fail, nr_qPCR_val, nr_RR_total, nr_RR_fail,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
val$tool = factor(val$tool, levels = c("circseq_cup", "CIRI2", "CIRIquant", "CircSplice", "find_circ", "CirComPara2",  "CIRCexplorer3", "circtools", "Sailfish-cir", "NCLscan", "NCLcomparator", "PFv2", "ecircscreen", "KNIFE",  "circRNA_finder", "segemehl")) 

cq
val
all_circ

Figure 3A

val_longer = val %>% pivot_longer(cols = c(perc_qPCR_val, perc_RR_val, perc_amp_val),
                     names_to = 'val_type', values_to = 'perc')  %>%
  left_join(val %>% select(tool, count_group, nr_qPCR_total, nr_RR_total, nr_amp_total) %>%
              rename(perc_qPCR_val = nr_qPCR_total, perc_RR_val = nr_RR_total, perc_amp_val = nr_amp_total) %>%
              pivot_longer(cols = c(perc_qPCR_val, perc_RR_val, perc_amp_val), values_to = 'nr_in_group', 
                           names_to = "val_type")) %>%
  select(tool, count_group, val_type, perc, nr_in_group) %>%
  mutate(margin = qnorm(0.975)*sqrt(perc*(1-perc)/nr_in_group),
         CI_low = perc - margin,
         CI_up = perc + margin) %>%
  mutate(count_group == ifelse(count_group == "count < 5", 'BSJ count < 5', count_group),
         count_group == ifelse(count_group == "count ≥ 5", 'BSJ count ≥ 5', count_group),
         count_group == ifelse(count_group == "no_counts", 'no BSJ count', count_group)) 
## Joining, by = c("tool", "count_group", "val_type")
val_longer$tool = factor(val_longer$tool, levels = c("circseq_cup", "CIRI2", "CIRIquant", "CircSplice", "find_circ", "CirComPara2",  "CIRCexplorer3", "circtools", "Sailfish-cir", "NCLscan", "NCLcomparator", "PFv2", "ecircscreen", "KNIFE",  "circRNA_finder", "segemehl")) 
val_longer$val_type = factor(val_longer$val_type, levels = c('perc_qPCR_val', 'perc_RR_val', 'perc_amp_val'))

val_longer %>%
  ggplot(aes(tool, perc, fill = count_group)) +
  geom_bar(stat = 'identity') +
  geom_errorbar(aes(ymin=CI_low, ymax=CI_up), 
                width=.3, color = 'grey45') +
  facet_grid(scales = 'free_x', space = "free", 
             rows = vars(val_type), cols = vars(count_group)) +
  scale_y_continuous(labels = scales::percent_format(), breaks = c(0,0.25, 0.5, 0.75, 1)) +
  mytheme_discrete_x +
  #theme(strip.text.y = element_text(size = 10), legend.position = "none") +
  theme(legend.position = "none") +
  scale_fill_manual(values = c('#00B9F2', '#E69F00' , '#999999')) +
  xlab('') +
  ylab('')

#ggsave('separate_figures/figure_3A.pdf',  width = 21, height = 13, units = "cm")

Figure 3B

upset

upset = list()
upset['qPCR_val'] = cq %>% filter(qPCR_val == "pass") %>% mutate(id_cl = paste(circ_id, cell_line, sep = "_")) %>% pull(id_cl) %>% unique() %>% list()
upset['RR_val'] = cq %>% filter(RR_val == "pass") %>% mutate(id_cl = paste(circ_id, cell_line, sep = "_")) %>% pull(id_cl) %>% unique() %>% list()
upset['amp_seq_val'] = cq %>% filter(amp_seq_val == "pass") %>% mutate(id_cl = paste(circ_id, cell_line, sep = "_")) %>% pull(id_cl) %>% unique() %>% list()
upset['qPCR_fail'] = cq %>% filter(qPCR_val == "fail") %>% mutate(id_cl = paste(circ_id, cell_line, sep = "_")) %>% pull(id_cl) %>% unique() %>% list()
upset['RR_fail'] = cq %>% filter(RR_val == "fail") %>% mutate(id_cl = paste(circ_id, cell_line, sep = "_")) %>% pull(id_cl) %>% unique() %>% list()
upset['amp_seq_fail'] = cq %>% filter(amp_seq_val == "fail") %>% mutate(id_cl = paste(circ_id, cell_line, sep = "_")) %>% pull(id_cl) %>% unique() %>% list()
upset['RR_out_of_range'] = cq %>% filter(is.na(RR_val)) %>% mutate(id_cl = paste(circ_id, cell_line, sep = "_")) %>% pull(id_cl) %>% unique() %>% list()
upset['amp_seq_not_included'] = cq %>% filter(is.na(amp_seq_val)) %>% mutate(id_cl = paste(circ_id, cell_line, sep = "_")) %>% pull(id_cl) %>% unique() %>% list()


# pdf(file="separate_figures/figure_3B_upset_small.pdf", onefile=FALSE,
#     width = 8, height = 2.5) 
upset(fromList(upset), 
      order.by = "freq",
      sets = c('qPCR_val', 'RR_val', 'amp_seq_val', 'qPCR_fail', 'RR_fail', 'amp_seq_fail', 'RR_out_of_range', 'amp_seq_not_included'),
      mainbar.y.label = "number of circRNAs", sets.x.label = "number of circRNAs",
      keep.order = TRUE,
      number.angles = 30,
      point.size = 2.5, line.size = 1)

# dev.off()

Figure 3C & Sup Figure 21

add total nr of circRNAs and calculate theoretical nr of TP

val_cl = val %>%
  left_join(all_circ %>%
              group_by(cell_line, tool) %>%
              summarise(n = n()) %>%
              pivot_wider(names_from = cell_line,
                          values_from = n)) %>%
  select(tool, count_group, perc_compound_val, HLF, `NCI-H23`, SW480) %>%
  mutate(HLF = perc_compound_val * HLF,
         `NCI-H23` = perc_compound_val * `NCI-H23`,
         SW480 = perc_compound_val * SW480) %>%
  pivot_longer(cols = c(HLF, `NCI-H23`, SW480),
               values_to = "theo_TP_all",
               names_to = "cell_line") %>%
  left_join(all_circ %>%
              group_by(cell_line, tool) %>%
              summarise(total_n_ut = n()))
## `summarise()` has grouped output by 'cell_line'. You can override using the
## `.groups` argument.
## Joining, by = "tool"
## `summarise()` has grouped output by 'cell_line'. You can override using the
## `.groups` argument.
## Joining, by = c("tool", "cell_line")
val_cl
val_cl$tool = factor(val_cl$tool, levels = c("circseq_cup", "CIRI2", "CIRIquant", "CircSplice", "find_circ", "CirComPara2",  "CIRCexplorer3", "circtools", "Sailfish-cir", "NCLscan", "NCLcomparator", "PFv2", "ecircscreen", "KNIFE",  "circRNA_finder", "segemehl")) 


val_cl %>%
  #filter(cell_line == 'HLF') %>%
  ggplot() +
  geom_bar(aes(tool, total_n_ut), stat = "identity", fill = 'grey') +
  geom_bar(aes(tool, theo_TP_all, fill = count_group), stat = "identity") +
  facet_wrap(~cell_line+count_group, scales = 'free') +
  #facet_wrap(~count_group, scales = 'free') +
  mytheme_discrete_x +
  scale_fill_manual(values = c('#00B9F2', '#E69F00' , '#999999')) +
  xlab('') +
  ylab('') +
  theme(legend.position = NULL) +
  scale_y_continuous(labels = scales::comma_format()) +
  theme(legend.position="none")

#ggsave('sup_figure_21.pdf',  width = 21, height = 24.5, units = "cm")
# ggsave('separate_figures/figure_3C_alt2.pdf',  width = 21, height = 8, units = "cm")